{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "a6733196",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "dd19e671",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>是否愿意下载</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>用户编号</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     是否愿意下载\n",
       "用户编号       \n",
       "0       Yes\n",
       "1       Yes\n",
       "2        No\n",
       "3       Yes\n",
       "4        No"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.read_csv('data/user_download.csv',index_col=0,encoding='gbk')\n",
    "data.head()\n",
    "# data.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "68011599",
   "metadata": {},
   "source": [
    "1.检测与处理重复值"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "be79cd7e",
   "metadata": {},
   "source": [
    "对一个特征进行去重"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "4bccd4a9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "用户编号\n",
       "0     Yes\n",
       "2      No\n",
       "27    NaN\n",
       "Name: 是否愿意下载, dtype: object"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "download = data['是否愿意下载'].drop_duplicates()\n",
    "download"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "ffb87c8d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(download)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "823dbf9c",
   "metadata": {},
   "source": [
    "对多个特征去重"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "d0d7daad",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2235, 7)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all = pd.read_csv('data/user_all_info.csv')\n",
    "all.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "da46ac8e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2172, 7)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all.drop_duplicates().shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "db5f37c0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2172, 7)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all.drop_duplicates(subset=['用户编号','编号']).shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7136df89",
   "metadata": {},
   "source": [
    "特征重复"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "64315c2d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>用户编号</th>\n",
       "      <th>年龄</th>\n",
       "      <th>性别</th>\n",
       "      <th>居住类型</th>\n",
       "      <th>编号</th>\n",
       "      <th>每月支出</th>\n",
       "      <th>是否愿意下载</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>男</td>\n",
       "      <td>城市</td>\n",
       "      <td>0</td>\n",
       "      <td>6807.50</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>30.0</td>\n",
       "      <td>男</td>\n",
       "      <td>城市</td>\n",
       "      <td>1</td>\n",
       "      <td>4780.45</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>-3.2</td>\n",
       "      <td>男</td>\n",
       "      <td>农村</td>\n",
       "      <td>3</td>\n",
       "      <td>5011.06</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>男</td>\n",
       "      <td>农村</td>\n",
       "      <td>5</td>\n",
       "      <td>4899.04</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10</td>\n",
       "      <td>23.0</td>\n",
       "      <td>男</td>\n",
       "      <td>城市</td>\n",
       "      <td>10</td>\n",
       "      <td>6816.02</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   用户编号    年龄 性别 居住类型  编号     每月支出 是否愿意下载\n",
       "0     0   NaN  男   城市   0  6807.50    Yes\n",
       "1     1  30.0  男   城市   1  4780.45    Yes\n",
       "2     3  -3.2  男   农村   3  5011.06    Yes\n",
       "3     5  -1.0  男   农村   5  4899.04     No\n",
       "4    10  23.0  男   城市  10  6816.02     No"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "ff593fb2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>每月支出</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>年龄</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.014168</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>每月支出</th>\n",
       "      <td>0.014168</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            年龄      每月支出\n",
       "年龄    1.000000  0.014168\n",
       "每月支出  0.014168  1.000000"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all[['年龄','每月支出']].corr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "d6648e18",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>编号</th>\n",
       "      <th>用户编号</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>编号</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>用户编号</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       编号  用户编号\n",
       "编号    1.0   1.0\n",
       "用户编号  1.0   1.0"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all[['编号','用户编号']].corr(method='kendall')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "9c3c1ecf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>年龄</th>\n",
       "      <th>每月支出</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>年龄</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.011119</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>每月支出</th>\n",
       "      <td>0.011119</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            年龄      每月支出\n",
       "年龄    1.000000  0.011119\n",
       "每月支出  0.011119  1.000000"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all[['年龄','每月支出','居住类型']].corr(method='kendall')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0d4d7d5d",
   "metadata": {},
   "source": [
    "定义求取特征是否完全相同的矩阵函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "5aa4fb5a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def feature_equals(df):\n",
    "    df_equals = pd.DataFrame([])\n",
    "    for i in df.columns:\n",
    "        for j in df.columns:\n",
    "            df_equals.loc[i,j] = df.loc[:,i].equals(df.loc[:,j])\n",
    "    return df_equals"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "61d5ffff",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>用户编号</th>\n",
       "      <th>年龄</th>\n",
       "      <th>性别</th>\n",
       "      <th>居住类型</th>\n",
       "      <th>编号</th>\n",
       "      <th>每月支出</th>\n",
       "      <th>是否愿意下载</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>用户编号</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>年龄</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>性别</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>居住类型</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>编号</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>每月支出</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>是否愿意下载</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         用户编号     年龄     性别   居住类型     编号   每月支出 是否愿意下载\n",
       "用户编号     True  False  False  False   True  False  False\n",
       "年龄      False   True  False  False  False  False  False\n",
       "性别      False  False   True  False  False  False  False\n",
       "居住类型    False  False  False   True  False  False  False\n",
       "编号       True  False  False  False   True  False  False\n",
       "每月支出    False  False  False  False  False   True  False\n",
       "是否愿意下载  False  False  False  False  False  False   True"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature_equals(all)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a573772c",
   "metadata": {},
   "source": [
    "检测与处理缺失值"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7f0cb366",
   "metadata": {},
   "source": [
    "检测缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "696cc480",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "用户编号       0\n",
       "年龄         6\n",
       "性别         0\n",
       "居住类型      22\n",
       "编号         0\n",
       "每月支出      20\n",
       "是否愿意下载    20\n",
       "dtype: int64"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all.isnull().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3d117317",
   "metadata": {},
   "source": [
    "处理缺失值-删除法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "40c1ab4a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2235, 7)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "d8a927e2",
   "metadata": {},
   "outputs": [],
   "source": [
    "all2 = all.dropna(axis=0,how='any')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "496d1af3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2169, 7)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all2.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "bff6b493",
   "metadata": {},
   "outputs": [],
   "source": [
    "all2.to_csv('tmp/all2.csv',index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5531ec89",
   "metadata": {},
   "source": [
    "处理缺失值-替换法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "c7b5ded7",
   "metadata": {},
   "outputs": [],
   "source": [
    "mean_num = all['每月支出'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "bab68a2f",
   "metadata": {},
   "outputs": [],
   "source": [
    "all['每月支出'] = all['每月支出'].fillna(mean_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "fb81dfdb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all['每月支出'].isnull().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "faf98d3c",
   "metadata": {},
   "source": [
    "处理缺失值-插值法"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f949892d",
   "metadata": {},
   "source": [
    "线性插值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "38e0632b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from scipy.interpolate import interp1d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "772507d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "x = np.array([1,2,3,4,5,8,9,10])\n",
    "y1 = np.array([2,8,18,32,50,78,100,130])\n",
    "y2 = np.array([3,5,7,9,11,17,19,21])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "282d522a",
   "metadata": {},
   "outputs": [],
   "source": [
    "linear1 = interp1d(x,y1,kind='linear')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "8fb22249",
   "metadata": {},
   "outputs": [],
   "source": [
    "linear2 = interp1d(x,y2,kind='linear')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "d6bd3235",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([59.33333333, 68.66666667])"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "linear1([6,7])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "7de6aed4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([13., 15.])"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "linear2([6,7])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "40e1e6ec",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "67cdef1f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAf80lEQVR4nO3de4DVc/7H8edbJRPbDluiCRXJumwb435NFJJGlNCKlX4tWqwtYrEurXbHssQipCEhushlG5kQ2trKRCXjFjVNNMkkNen2/v3xOWXKpMw5M99zzrwe/5xzvufyfe9Zvfr0OZ/v+2PujoiIpJcdoi5AREQST+EuIpKGFO4iImlI4S4ikoYU7iIiaahu1AUANGrUyJs3bx51GSIiKWXmzJlL3b1xZc8lRbg3b96cGTNmRF2GiEhKMbMvtvacpmVERNKQwl1EJA0p3EVE0pDCXUQkDSncRUTSUFKslhERqW3GFS4iN7+IkrJymmZm0L9ja3LaZiXs8xXuIiI1bFzhIgaOmU352vUALCorZ+CY2QAJC3hNy4iI1LDc/CLK167nopkvcsznswAoX7ue3PyihJ1D4S4iUsNKyso5rPgDbi54hG6zX9vseKIo3EVEaljr+uu498Vcin/ZhJs6XL7peNPMjISdQ3PuIiI1yZ3H3hnK7t8t45yeuXxXvwEAGfXq0L9j64SdRuEuIlKThg4lq+AV5lx1I1/v0QbTahkRkRQ3Zw5cfTV07MjBd9/GOztU38y45txFRGrCqlXQowf88peQlwfVGOygkbuISM34059g7lzIz4cmTar9dBq5i4hUt9Gj4eGHYcAA6NChRk6pcBcRqU5ffAG9e8MRR8Add9TYaRXuIiLVZd06uOACWL8enn4a6tWrsVNrzl1EpLrceitMmQIjR0LLljV6ao3cRUSqw+uvw6BBcMklcP75NX56hbuISKItXQo9e8L++8OQIZGUsM1wN7NhZrbEzOZUOJZrZh+a2ftmNtbMMis8N9DMPjGzIjPrWE11i4gkJ/cwWl+6FJ55BnbeOZIytmfkPhw4bYtjE4GD3f03wEfAQAAzOxDoARwUe8+/zaxOwqoVEUl2990HL70Ed90Fv/1tZGVsM9zdfTKwbItjr7r7utjDqUCz2P0uwDPu/r27zwc+AY5IYL0iIsmrsDCsZe/cGa68MtJSEjHn/nvgP7H7WcDCCs8Vx479iJn1MbMZZjajtLQ0AWWIiETou+9Ce4FGjWDYMDCLtJy4wt3MbgTWAU9tPFTJy7yy97r7UHfPdvfsxo0bx1OGiEj0+vWDjz+Gp54KAR+xKq9zN7NewJlAe3ffGODFwF4VXtYMKKl6eSIiKWDkSBg+HG66CU46KepqgCqO3M3sNOA64Cx3X1XhqfFADzOrb2YtgFbA/+IvU0QkSX36KfTtC8cdBzffHHU1m2xz5G5mTwMnAY3MrBi4hbA6pj4w0cK80lR37+vuc81sFPABYbrmCndfX13Fi4hEas2aMM9et26YjqmbPBf9b7MSd6/s0qrHfuL1g4BB8RQlIpISbrwRZswIXR/33jvqajajK1RFRKpiwoSwlr1vX+jaNepqfkThLiLyc335JfTqBQcfDHffHXU1lUqeCSIRkVSwYQNcdBGsWAGTJkFGRtQVVUrhLiLyc9x1F0ycGHZWOuigqKvZKk3LiIhsr2nTwo+o3brBZZdFXc1PUriLiGyP5ctDX/asLBg6NPL2AtuiaRkRkW1xD6tiFiyAyZMhMzPqirZJ4S4isi2PPx56s99xBxxzTNTVbBdNy4iI/JR580JTsHbt4Prro65muyncRUS2ZvXqMM/eoAGMGAF1UmfvIU3LiIhszYAB8N57YWelpk2jruZn0chdRKQy48eHza2vvho6dYq6mp9N4S4isqXi4rDJddu2MHhw1NVUicJdRKSi9euhZ0/4/vuwQqZ+/agrqhLNuYuIVDRoELz5ZthZaf/9o66myjRyFxHZ6K234NZb4cILQ3OwFKZwFxEBWLYshHqLFvDgg0nfXmBbNC0jIuIOvXuHPu1TpsAvfhF1RXFTuIuIPPQQjB0b2vlmZ0ddTUJoWkZEarfZs+Gaa+C008JtmlC4i0jttWoVnHde6PI4fDjskD6RuM3/JWY2zMyWmNmcCsd2M7OJZvZx7HbXCs8NNLNPzKzIzDpWV+EiInG7+urQGOzJJ6FJk6irSajt+WtqOHDaFseuBwrcvRVQEHuMmR0I9AAOir3n32aWOp12RCTtjStcxLGDJ3FFzkB45BE+uvhyOPXUqMtKuG2Gu7tPBpZtcbgLkBe7nwfkVDj+jLt/7+7zgU+AIxJTqohIfMYVLmLgmNnYF59z54QhFO7ZmnP2PJ1xhYuiLi3hqjrB1MTdFwPEbnePHc8CFlZ4XXHsmIhI5HLzi8hYvoxHRt8O7vQ7qz8rNhi5+UVRl5Zwif71oLJV/17pC836mNkMM5tRWlqa4DJERH6svORLnnrmRlp8U0Lfs2+gOHMPAErKyiOuLPGqGu5fmdmeALHbJbHjxcBeFV7XDCip7APcfai7Z7t7duPGjatYhojIdlq6lGefu4kW35Rw6Tk3M6X5bzc91TQzI7q6qklVw3080Ct2vxfwQoXjPcysvpm1AFoB/4uvRBGROH39NZxyCi2/WcQV3W/hnQrBnlGvDv07to6utmqyzStUzexp4CSgkZkVA7cAg4FRZnYpsADoBuDuc81sFPABsA64wt3XV1PtIiLb9vXX0L49fPghdcaPp3Pjg/gwv4iSsnKaZmbQv2Nrctqm30+D5l7plHiNys7O9hkzZkRdhoikm9iInXnzws5KHTpEXVFCmdlMd6+0X0L6XI4lIlLRsmVh/fq8efDCC2kX7NuixmEikn42BvvcuSHYO9a+i+U1cheR9PLNNyHY58yBceNCQ7BaSOEuIumjYrCPHQunnx51RZFRuItIeigrC/Pqs2fDmDFwxhlRVxQpzbmLSOrbGOzvvReCvVOnqCuKnEbuIpLali8PP5jOmgWjR8OZZ0ZdUVJQuItI6lq+PIzYCwvh+eehc+eoK0oaCncRSU0bR+yFhfDcc3DWWVFXlFQ05y4iqefbb8MSx5kzw4i9S5eoK0o6GrmLSGrZGOwzZoQRu4K9Ugp3EUkdK1aEtevTp8OoUZCTE3VFSUvTMiKSGlasCCP2adNCsJ99dtQVJTWN3EUk+W0csU+bBs8+C127Rl1R0lO4i0hyW7EiXG06dSo88wycc07UFaUEhbuIJK/vvgvB/t//wtNPw7nnRl1RylC4i0hyqhjsI0dCt25RV5RSFO4iknxWrgz9YaZMgaeegu7do64o5Wi1jIgkl43B/vbbIdjPOy/qilKSRu4ikjxWrgyNv956C0aMgB49oq4oZSncRSQ5rFoVGn9NngxPPgnnnx91RSlN4S4i0dsY7G++CU88ARdcEHVFKS+ucDeza8xsrpnNMbOnzWwnM9vNzCaa2cex210TVayIpKFVq0JHxzfegLw8uPDCqCtKC1UOdzPLAv4IZLv7wUAdoAdwPVDg7q2AgthjEZEfKy8Pjb8mTYLhw6Fnz6grShvxTsvUBTLMrC7QACgBugB5sefzgJw4zyEi6ai8PIzYCwpCsP/ud1FXlFaqHO7uvgi4C1gALAaWu/urQBN3Xxx7zWJg98reb2Z9zGyGmc0oLS2tahkikoo2jtgLCuDxx+Gii6KuKO3EMy2zK2GU3gJoCuxsZtv9byp3H+ru2e6e3bhx46qWISIpYlzhIo4dPIkDrh3DtDYn4K+9BsOGQa9eUZeWluKZljkFmO/upe6+FhgDHAN8ZWZ7AsRul8RfpoiksnGFixg4ZjZLS8t4eOwgDv94Jjd2uppxbU6NurS0Fc8VqguAo8ysAVAOtAdmACuBXsDg2O0L8RYpIqlt+Ki3ueLNMfR4L5/dVn3Ldaf347mD2vNmfhE5bbOiLi8tVTnc3X2amT0PvAusAwqBocAuwCgzu5TwF4C6/YjURu6hhcCQITz//Gh2cKdgvyN49PAcpu19CAAlZeURF5m+4uot4+63ALdscfh7wiheRGqj8vLQxXHIEHjvPcjMZNRx5/LvAztQnLnHZi9tmpkRUZHpT1eoikhifPEFXHcdNGsGvXvD+vUwdCgsWkSDe+/m68abT79k1KtD/46tIyo2/akrpIhUnXu4svS++2D8+HAsJwf69YMTTwSzcKhtAwBy84soKSunaWYG/Tu21nx7NVK4i8jPt3Jl6Np4//0wZw786lcwYAD84Q+w996VviWnbZbCvAYp3EVk+332GTzwQFifXlYGbduG+z16QIbmz5OJwl1Efpo7TJwYfiB9+WXYYYewSXW/fnDssZumXiS5KNxFpHIrVoT2u0OGQFERNG4MN94IfftClqZXkp3CXUQ299FHYerl8cdDwB9+eAj57t2hfv2oq5PtpHAXEdiwASZMCKP0CROgXr0Q5v36wZFHRl2dVIHCXaQ2W748jNAfeAA++QT22AP++lf4v/8L9yVlKdxFaqN588Iyxry8sKzx6KPhttvCD6U77hh1dZIACneR2mL9+rDaZcgQeO21EOLnnx+mXg47LOrqJMEU7iLp7ptv4LHH4N//hvnzw0qXQYPgssvCChhJSwp3kXQ1e3YYpY8YEZp5HX88/P3voT1AvXpRVyfVTOEukk7WrQs9XoYMCT1fdtoJLrwwTL20aRN1dVKDFO4i6WDpUnj00TD1snBh6O/y97/DpZeGvi9S6yjcRVJZYWEYpY8cCd9/D+3ahQ6NnTtDnTpRVycRUriLpJq1a2HMmBDq77wDDRrAxRfDlVfCwQdHXZ0kCYW7SKr46quw+cVDD0FJCbRsCf/8J1xyCey6a9TVSZJRuIsku+nTwyj92WdhzRro0AEefhhOP11TL7JVCneRZLRmDTz3XAj1adNgl13CuvQrr4QDDoi6OkkBCneRZLJ4cZh2efjhMA3TqhXce2+YU2/YMOrqJIUo3EWi5g5Tp4ZR+nPPhbXqZ5wR1qZ36BA2xxD5meIKdzPLBB4FDgYc+D1QBDwLNAc+B7q7+zfxnEckLa1eHebRhwyBmTPDyPzKK+GKK2C//aKuTlJcvEOCe4EJ7n4A0AaYB1wPFLh7K6Ag9lhENiouDjsa7bVXmG5ZtSq03F20CO65R8EuCVHlkbuZNQROAC4GcPc1wBoz6wKcFHtZHvAGcF08RYqkPHd4++1wgdHYsWFzjM6dw9RL+/bah1QSLp5pmZZAKfC4mbUBZgJXAU3cfTGAuy82s90re7OZ9QH6AOy9995xlCGSxMrLw9WjQ4bAe+9BZiZccw1cfjm0aBF1dZLG4pmWqQscCjzo7m2BlfyMKRh3H+ru2e6e3VhtRyXdfPEFXHcdNGsGvXuHkfrQoWHqJTdXwS7VLp6RezFQ7O7TYo+fJ4T7V2a2Z2zUviewJN4iRVKCO7z+ehiljx8fjuXkwB//CCecoKkXqVFVHrm7+5fAQjNrHTvUHvgAGA/0ih3rBbwQV4UiyW7lyrA2/ZBDwvz5W2/BgAFhY4zRo+HEExXsUuPiXefeD3jKzHYEPgMuIfyFMcrMLgUWAN3iPIdIcvrss7DKZdgwKCuDtm3D/R49ICMj6uqklosr3N19FpBdyVPt4/lckaTlDhMnhqmXl18OFxide25Y9XLMMRqhS9LQFaoi22PFCsjLg/vvh6Ii2H33sFa9b9+wJ6lIklG4i/yUjz4KgT58eAj4ww+HJ56A7t2hfv2oqxPZKoW7yJY2bIAJE8LUy4QJYTPp7t3D1MuRR0Zdnch2UbiLbLR8OTz+ePiR9JNPYI894NZboU+fcF8khSjcRT74IEy9PPFEWNZ49NFw221wzjmw445RVydSJQp3qZ3Wr4eXXgpTLwUFIcTPPz9MvRx2WNTVicRN4S61y7JlYS36Aw/A55+H9gCDBoVdjtQGQ9KIwl3S0rjCReTmF1FSVk7TzAxub7mBk18bBSNGhGZexx8ferzk5EBd/TGQ9KP/qiXtjCtcxMAxs1nz/Ro6fjyVi2e+yFEL57C+fn3q9OwZpl7atIm6TJFqpXCXtJObX0TTxZ8zdOwg9l1WTHHD3bnzpIuZfNxZ/Of2s6MuT6RGKNwl7bSeOZl7x/+D1XXr0zdnIK+2OooNO9TB1kZdmUjNUbhL+nCH3FweHX0bH+zeksu6/oXFDX/4kbRpppp5Se2hcJf0sHp1WPEyYgQlp3bmd4f+nm+ot+npjHp16N+x9U98gEh6iXeDbJHolZSEnukjRsDtt9Ms/wVuOe9wsjIzMCArM4M7ux5CTls1+JLaQyN3SW3Tp4fljMuXw5gxcHb4wTSnbZbCXGo1jdwldY0cGdar16sHU6ZsCnYRUbhLKtqwAQYOhAsvDF0ap0+H3/wm6qpEkoqmZSS1fPst9OwJL74YujUOGaLmXiKVULhL6vj0UzjrrLAT0v33w+WXa1s7ka1QuEtqmDQJusX2Wn/1VTj55GjrEUlymnOX5OYeOjh26BA2zPjf/xTsIttB4S7Ja80a+MMf4Mor4fTT4b//hX33jboqkZQQd7ibWR0zKzSzl2KPdzOziWb2cex21/jLlFqntBROPRUefhiuvx7GjYOGDaOuSiRlJGLkfhUwr8Lj64ECd28FFMQei2y/99+HI44IUzBPPQV33gl16kRdlUhKiSvczawZ0Al4tMLhLkBe7H4ekBPPOaSWGTsWjjkmTMlMngwXXBB1RSIpKd6R+7+AAcCGCseauPtigNjt7pW90cz6mNkMM5tRWloaZxmS8tzhjjuga1c46KBwYdLhh0ddlUjKqnK4m9mZwBJ3n1mV97v7UHfPdvfsxtq7snZbtQrOOw9uuilcoPTmm9C0adRViaS0eNa5HwucZWZnADsBDc1sBPCVme3p7ovNbE9gSSIKlTS1cCF06QKzZsE//gF//rMuTBJJgCqP3N19oLs3c/fmQA9gkrv3BMYDvWIv6wW8EHeVkp6mTAlTL59+Ci+9BP37K9hFEqQ61rkPBk41s4+BU2OPRTY3fDi0awe77AJTp8IZZ0RdkUhaSUj7AXd/A3gjdv9roH0iPlfS0Lp1MGAA3HMPtG8Po0bBbrtFXZVI2lFvGak5ZWXQowfk50O/fnD33VBX/wmKVAf9yZKaUVQUOjrOnw9Dh4b9TkWk2ijcpfrl54eljjvuCAUFYfckEalWahwm1cc9TL2ccQbss0+4MEnBLlIjNHKXhBpXuIjc/CKWLv2Wu994iE4z88NVp3l5YWWMiNQIhbskzLjCRQwcM5tdvlnKyLGDOKzkQ+4//kKa3XAnOQp2kRqlcJeEyZ3wIae+V8DNBY/QYO1q/tDlev5zwHFkTfyYnMP2iro8kVpF4S6J8cUX3PHodbT7bCaz9tyfAaf/kY8aNwegpKw82tpEaiGFu8Rn/fqwWfWNN3Lk2vXc2v4y8g49kw07/NB/vWlmRoQFitROWi0jVTd7dui9fvXVcMIJvDXmDZ45uutmwZ5Rrw79O7aOrkaRWkrhLj/f6tXwl7/AoYfCZ5+F3ZJefpmOnY7kzq6HkJWZgQFZmRnc2fUQctpmRV2xSK2jaRn5eSZPDleXfvQRXHQR/POf0KjRpqdz2mYpzEWSgEbusn2WL4e+feHEE2Ht2nDVaV7eZsEuIslD4S7bNnYs/PrX8MgjcO21Ya69Q4eoqxKRn6BpGdm6kpLQvXHMGGjTBsaPh+zsqKsSke2gkbv82IYNoXPjgQfCK6/A4MGhL4yCXSRlaOQumysqgj59wg+n7dqFkN9vv6irEpGfSSN3CdasgUGDwvTL++/DY4+F9rwKdpGUpJG7wLRp0Ls3zJkD3bvDvffCHntEXZWIxEEj99rsu+/C1aVHHx22wBs/Hp59VsEukgY0cq+t/vOfsG594UK4/HL429+gYcOoqxKRBNHIvbYpLYULLwy7I+28M7z9dmj8pWAXSStVDncz28vMXjezeWY218yuih3fzcwmmtnHsdtdE1euVJk7PPFEuBjpuefgr3+FwsLQ+EtE0k48I/d1wLXu/mvgKOAKMzsQuB4ocPdWQEHssURp/nzo2BF69YL994dZs+CWW6B+/agrE5FqUuVwd/fF7v5u7P4KYB6QBXQB8mIvywNy4qxRqmrdutDY6+CDYepUeOCBMA1z4IFRVyYi1SwhP6iaWXOgLTANaOLuiyH8BWBmu2/lPX2APgB77713Isqo1TZuTF1SVk7TzAzuaL6OdnfdADNnQufOIdj30lZ3IrVF3OFuZrsAo4Gr3f1bM9uu97n7UGAoQHZ2tsdbR222cWPq8rXrqb/2e3qOG87x/xvD6t1+xU6jRsG558J2/v8iIukhrnA3s3qEYH/K3cfEDn9lZnvGRu17AkviLVJ+Wm5+EavXrKXDx9O44fVhNC9bzDO/6cDwnMuZ0K1L1OWJSASqHO4WhuiPAfPc/e4KT40HegGDY7cvxFWh/LTVqznxjbFcOn0s+y5bxGe7NuX8HoP47z5tsO+jLk5EohLPyP1Y4HfAbDObFTt2AyHUR5nZpcACoFtcFUrlvv4aHnwQhgzhb0uWMLvJvvTr3J9XDjiO9bE9TLUxtUjtVeVwd/e3ga1N5Lav6ufKNsyfD/fcExp7rVoFp5/O2zkXc9mCX1C+bsOml2ljapHaTVeopooZM+C880KXxocegm7dwo5Ir7zCcX26c+c5v9HG1CKyiXrLJDP30AMmNxfeeCO0CLj2WrjqKsjaPLi1MbWIVKRwT0Zr1sDIkXDXXTB3bgjy3NywiYZ6wIjIdlC4J5Ply+Hhh0M/9ZISOOSQ0A/mvPNgxx2jrk5EUojCPRksXAj/+hc88gisWAHt28OwYdChgy4+EpEqUbhH6f33w3TLM8+E+fXu3eHPf4ZDD426MhFJcQr3muYe9ibNzYVXXw091a+4IuyI1Lx51NWJSJpQuNeUtWtDH/Xc3NByd489wu5HffvCrmp5LyKJpXCvbitWwKOPhjn1BQvggAPC45491U9dRKqNwr26LF4M990XLjgqK4Pjjw/b2XXqBDvo2jERqV4K9zht2Uf91lY7cMorT8KIEWGzjK5dw4+kRx4ZdakiUoso3OOwqY/6mnUcUTyXPs+P5pRPp7Nup52o27s3/OlPsO++UZcpIrWQwr2qSksp/PuD3DZ3Gsd//i57fLeMrzMacs+xFzCx3bm8cvvZUVcoIrWYwn17rVkDU6aE5Yv5+fDuu9wKlO20C2/v81vebHkYL/76eFbX2wlbG3WxIlLbKdy3xh2KikKYv/pqaNy1ciXUrQtHHw23307vxbsxaedmbIj1T99IfdRFJGoK94qWLQsXGG0M9AULwvH99oNevUI7gHbtNjXvOrNwEe/E9i7dSH3URSQZ1O5wX7sWpk79IcynTw8j9l/+MvR3ueEGOPVUaNmy0rdvbLFbcbVM/46t1XpXRCJXu8LdHT799Id589dfDxcZ7bBDWKp4881hdH7EEWH6ZTuoj7qIJKP0D/eyMpg06YfR+fz54fg++8D550PHjnDyyZCZGWWVIiIJldLhvuUFRP07tibnkCZhemVjmE+bBuvXwy67hBC/9towOt9vP7XTFZG0lbLhvukCorXraVb2JSfMKqRB3izWFs+m3nffhuDOzoaBA0OYH3UU1KsXddkiIjUiZcM9N7+I/RZ8yL0v5tLymxIASn7RiAkHHEvn/heHH0R/9atoixQRiUi1hbuZnQbcC9QBHnX3wYn8/JKyclY3bMznuzblyUM7MbnFoXy6WzPMjM7dOyXyVCIiKadawt3M6gAPAKcCxcB0Mxvv7h8k6hxNMzNYBPy+219/dFxEpLarrt6zRwCfuPtn7r4GeAboksgT9O/Ymox6m18ZqguIRESC6gr3LGBhhcfFsWObmFkfM5thZjNKS0t/9gly2mZxZ9dDyMrMwICszAzu7HqI1pyLiFB9c+6VrTH0zR64DwWGAmRnZ3slr98mXUAkIlK56hq5FwN7VXjcDCippnOJiMgWqivcpwOtzKyFme0I9ADGV9O5RERkC9UyLePu68zsSiCfsBRymLvPrY5ziYjIj1XbOnd3fwV4pbo+X0REtq66pmVERCRC5l6lhSqJLcKsFPgi6jri1AhYGnURSUTfx+b0ffxA38Xm4vk+9nH3xpU9kRThng7MbIa7Z0ddR7LQ97E5fR8/0Hexuer6PjQtIyKShhTuIiJpSOGeOEOjLiDJ6PvYnL6PH+i72Fy1fB+acxcRSUMauYuIpCGFu4hIGlK4x8nM9jKz181snpnNNbOroq4pamZWx8wKzeylqGuJmpllmtnzZvZh7L+Ro6OuKUpmdk3sz8kcM3vazHaKuqaaZGbDzGyJmc2pcGw3M5toZh/HbndNxLkU7vFbB1zr7r8GjgKuMLMDI64palcB86IuIkncC0xw9wOANtTi78XMsoA/AtnufjCh71SPaKuqccOB07Y4dj1Q4O6tgILY47gp3OPk7ovd/d3Y/RWEP7y1tsm8mTUDOgGPRl1L1MysIXAC8BiAu69x97JIi4peXSDDzOoCDahlrcDdfTKwbIvDXYC82P08ICcR51K4J5CZNQfaAtMiLiVK/wIGABsiriMZtARKgcdj01SPmtnOURcVFXdfBNwFLAAWA8vd/dVoq0oKTdx9MYTBIrB7Ij5U4Z4gZrYLMBq42t2/jbqeKJjZmcASd58ZdS1Joi5wKPCgu7cFVpKgf3KnothcchegBdAU2NnMekZbVfpSuCeAmdUjBPtT7j4m6noidCxwlpl9TtgU/WQzGxFtSZEqBordfeO/5J4nhH1tdQow391L3X0tMAY4JuKaksFXZrYnQOx2SSI+VOEeJzMzwpzqPHe/O+p6ouTuA929mbs3J/xQNsnda+3IzN2/BBaaWevYofbABxGWFLUFwFFm1iD256Y9tfgH5grGA71i93sBLyTiQ6tts45a5Fjgd8BsM5sVO3ZDbLMSkX7AU7HtJj8DLom4nsi4+zQzex54l7DKrJBa1orAzJ4GTgIamVkxcAswGBhlZpcS/gLslpBzqf2AiEj60bSMiEgaUriLiKQhhbuISBpSuIuIpCGFu4hIGlK4i4ikIYW7iEga+n/T75OVL7lXhwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(x,y1)\n",
    "plt.plot(x,linear1(x),'r-')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0ae273c1",
   "metadata": {},
   "source": [
    "检测与处理异常值-3西格玛"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "d48d21b0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>用户编号</th>\n",
       "      <th>年龄</th>\n",
       "      <th>性别</th>\n",
       "      <th>居住类型</th>\n",
       "      <th>编号</th>\n",
       "      <th>每月支出</th>\n",
       "      <th>是否愿意下载</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>30.0</td>\n",
       "      <td>男</td>\n",
       "      <td>城市</td>\n",
       "      <td>1</td>\n",
       "      <td>4780.45</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>-3.2</td>\n",
       "      <td>男</td>\n",
       "      <td>农村</td>\n",
       "      <td>3</td>\n",
       "      <td>5011.06</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>男</td>\n",
       "      <td>农村</td>\n",
       "      <td>5</td>\n",
       "      <td>4899.04</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10</td>\n",
       "      <td>23.0</td>\n",
       "      <td>男</td>\n",
       "      <td>城市</td>\n",
       "      <td>10</td>\n",
       "      <td>6816.02</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>11</td>\n",
       "      <td>-2.4</td>\n",
       "      <td>男</td>\n",
       "      <td>城市</td>\n",
       "      <td>11</td>\n",
       "      <td>7746.90</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   用户编号    年龄 性别 居住类型  编号     每月支出 是否愿意下载\n",
       "0     1  30.0  男   城市   1  4780.45    Yes\n",
       "1     3  -3.2  男   农村   3  5011.06    Yes\n",
       "2     5  -1.0  男   农村   5  4899.04     No\n",
       "3    10  23.0  男   城市  10  6816.02     No\n",
       "4    11  -2.4  男   城市  11  7746.90    Yes"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all = pd.read_csv('tmp/all2.csv')\n",
    "all.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "46868a11",
   "metadata": {},
   "outputs": [],
   "source": [
    "a = all['年龄'].mean()\n",
    "b = all['年龄'].std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "b34259e4",
   "metadata": {},
   "outputs": [],
   "source": [
    "c = all['年龄'].apply(lambda x : x>a+3*b or x<a-3*b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "753b62b3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1      -3.2\n",
       "2      -1.0\n",
       "4      -2.4\n",
       "18     -5.0\n",
       "951    -1.3\n",
       "956    -1.8\n",
       "1092   -1.4\n",
       "Name: 年龄, dtype: float64"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all.loc[c,'年龄']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dc3e546a",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:base] *",
   "language": "python",
   "name": "conda-base-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
